drug_overdose = read_csv("./data/VSRR_Provisional_Drug_Overdose_Death_Counts.csv") %>%
janitor::clean_names()
state_level = c(state.name[1:8], "District of Columbia", state.name[9:32],"New York City", state.name[33:50])
drug_overdose_52 =
drug_overdose %>%
filter(!(state_name %in% c("United States"))) %>%
relocate(state_name) %>%
mutate(month = factor(month, levels = month.name), # change month and year to factor
year = factor(year),
state_name = factor(state_name, levels = state_level)) %>%
arrange(state_name) %>%
group_by(state_name, year) %>%
mutate(month = sort(month))
Above are same as in data_cleaning, delete later.
nyc_df =
drug_overdose_52 %>%
filter(state_name == "New York City")
missing drug type data in year: 2016, 2017
nyc_df %>%
ungroup() %>%
filter(year %in% c("2016","2017")) %>%
select(year, month, indicator, data_value, predicted_value)
## # A tibble: 288 × 5
## year month indicator data_value predicted_value
## <fct> <fct> <chr> <dbl> <dbl>
## 1 2016 January Natural & semi-synthetic opioids (T… NA NA
## 2 2016 January Natural, semi-synthetic, & syntheti… NA NA
## 3 2016 January Number of Drug Overdose Deaths 1118 1118
## 4 2016 January Number of Deaths 53516 NA
## 5 2016 January Cocaine (T40.5) NA NA
## 6 2016 January Methadone (T40.3) NA NA
## 7 2016 January Natural & semi-synthetic opioids, i… NA NA
## 8 2016 January Psychostimulants with abuse potenti… NA NA
## 9 2016 January Synthetic opioids, excl. methadone … NA NA
## 10 2016 January Heroin (T40.1) NA NA
## # … with 278 more rows
summarize_nyc_drug =
nyc_df %>%
ungroup() %>%
select(year, month, indicator, data_value) %>%
filter(!(indicator %in% c("Number of Deaths","Number of Drug Overdose Deaths","Percent with drugs specified"))) %>%
filter(!(year %in% c("2016","2017"))) %>%
mutate(indicator = as.factor(indicator)) %>%
mutate(
indicator = fct_reorder(indicator, data_value)
)
summarize_nyc_drug %>%
summarize(n = sum(data_value)) %>%
arrange(desc(n))
## # A tibble: 1 × 1
## n
## <dbl>
## 1 276444
summarize_nyc_drug %>%
ggplot(aes(x = indicator, y = data_value, fill = indicator)) +
geom_violin(alpha = 0.5) +
scale_x_discrete(labels = c("Psychostimulants \n with abuse potential \n (T43.6)", "Methadone \n (T40.3)", "Natural & \n semi-synthetic \n opioids \n (T40.2)", "Natural & semi-\n synthetic opioids, \n incl. methadone \n (T40.2, T40.3)", "Heroin \n (T40.1)", "Cocaine \n (T40.5)", "Synthetic opioids, \n excl. \n methadone \n (T40.4)", "Natural, \n semi-synthetic, & \n synthetic opioids, \n incl. methadone \n (T40.2-T40.4)", "Opioids \n (T40.0-T40.4,\n T40.6)")) +
guides(fill = guide_legend(nrow = 6, byrow = TRUE)) +
labs(
title = "NYC Number of Drug Overdose Deaths with Drug Categories (2015 - 2021)",
x = "Drug Categories",
y = "Number of Drug Overdose Deaths",
caption = "Data comes from VSRR_Provisional_Drug_Overdose_Death_Counts dataset."
)
nyc_drug_overdose_death_df =
nyc_df %>%
ungroup() %>%
select(year, month, indicator, data_value) %>%
filter(indicator %in% c("Number of Deaths", "Number of Drug Overdose Deaths")) %>%
pivot_wider(
names_from = indicator,
values_from = data_value
) %>%
janitor::clean_names() %>%
group_by(year, month) %>%
mutate(
percent_overdose_death = number_of_drug_overdose_deaths / number_of_deaths
)
nyc_drug_overdose_death_df %>%
ungroup() %>%
ggplot(aes(x = month, y = percent_overdose_death, color = year)) +
geom_point() +
geom_line(aes(group = year)) +
labs(
title = "Percent of Drug Overdoes Deaths over Total Number of Deaths by Year in NYC",
x = "Months",
y = "Percent of Drug Overdoes Deaths",
caption = "Data comes from VSRR_Provisional_Drug_Overdose_Death_Counts dataset from 2015 to 2021)."
)
nyc_drug_overdose_death_df %>%
ungroup() %>%
ggplot(aes(x = month, y = percent_overdose_death, group = NA)) +
geom_point() +
geom_line() +
facet_grid(.~ year) +
labs(
title = "Percent of Drug Overdoes Deaths over Total Number of Deaths by Year in NYC",
x = "Months",
y = "Percent of Drug Overdoes Deaths",
caption = "Data comes from VSRR_Provisional_Drug_Overdose_Death_Counts dataset from 2015 to 2021."
) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
summarize_nyc_drug %>%
ggplot(aes(x = year, y = data_value, color = indicator)) +
geom_point() +
geom_line(aes(group = indicator)) +
scale_color_viridis(discrete = TRUE, labels = c("Psychostimulants with abuse potential (T43.6)", "Methadone (T40.3)", "Natural & semi-synthetic opioids (T40.2)", "Natural & semi-synthetic opioids, incl. methadone (T40.2, T40.3)", "Heroin (T40.1)", "Cocaine (T40.5)", "Synthetic opioids, excl. methadone (T40.4)", "Natural, semi-synthetic, & synthetic opioids, incl. methadone (T40.2-T40.4)", "Opioids (T40.0-T40.4,T40.6)")) +
guides(color = guide_legend(nrow = 6, byrow = TRUE)) +
labs(
title = "Number of Drug Overdose Deaths with Drug Categories by Year in NYC",
x = "Drug Categories",
y = "Number of Drug Overdose Deaths",
caption = "Data comes from VSRR_Provisional_Drug_Overdose_Death_Counts dataset between 2015 and 2021."
)
ny_eco_df =
read_csv("./data/median_household_income_ny.csv") %>%
janitor::clean_names() %>%
select(year, household_income_by_race, household_income_by_race_moe, geography) %>%
filter(year >= "2015",
!(geography %in% c("New York-Newark-Jersey City, NY-NJ-PA", "New York"))) %>%
mutate(
geography = str_replace(geography, "New York, NY", "New York City"),
geography = str_replace(geography, ", NY", ""),
year = factor(year))
ny_eco_df %>%
mutate(text_label = str_c("Year: ", year, "\nMedian Household Income: $", household_income_by_race,
"\nMargin of error: ± $", household_income_by_race_moe)) %>%
plot_ly(
x = ~year, y = ~household_income_by_race, color = ~geography, text = ~text_label,
alpha = 0.5, type = "scatter", mode = "markers+lines", colors = "viridis",error_y = ~list(array = household_income_by_race_moe)) %>%
layout(
title = "Median Household Income: New York vs. The U.S",
xaxis = list(title = "Year"),
yaxis = list(title = "Median Household Income"))
ny_county_df =
read_csv("./data/NCHS_-_Drug_Poisoning_Mortality_by_County__United_States.csv") %>%
janitor::clean_names() %>%
filter(state %in% "New York",
year >= 2015) %>%
select(year, county, population, model_based_death_rate) %>%
rename(death_rate = model_based_death_rate) %>%
mutate(
county = str_replace(county, "County, NY", "")) %>%
mutate(year = factor(year),
county = str_to_lower(county)) %>%
filter(str_detect(county, "bronx|queens|kings|new york|richmond")) %>%
relocate(county)
ny_county_df
## # A tibble: 20 × 4
## county year population death_rate
## <chr> <fct> <dbl> <dbl>
## 1 "bronx " 2015 1440726 15.5
## 2 "bronx " 2016 1445237 22.2
## 3 "bronx " 2017 1439725 24.3
## 4 "bronx " 2018 1432132 26.8
## 5 "kings " 2015 2610607 9.68
## 6 "kings " 2016 2611232 12.8
## 7 "kings " 2017 2596385 14.5
## 8 "kings " 2018 2582830 11.8
## 9 "new york " 2015 1636713 10.6
## 10 "new york " 2016 1636261 16.3
## 11 "new york " 2017 1629780 15.7
## 12 "new york " 2018 1628701 17.1
## 13 "queens " 2015 2307614 7.42
## 14 "queens " 2016 2309032 11.3
## 15 "queens " 2017 2296865 12.3
## 16 "queens " 2018 2278906 11.6
## 17 "richmond " 2015 472521 17.8
## 18 "richmond " 2016 474214 25.4
## 19 "richmond " 2017 475516 23.7
## 20 "richmond " 2018 476179 24.7
data(county.fips)
nyc_fip = county.fips %>%
filter(str_detect(polyname, "new york")) %>%
mutate(
polyname = str_replace(polyname, "new york,", "")) %>%
filter(str_detect(polyname, "bronx|queens|kings|new york|richmond")) %>%
as.tibble() %>%
slice(rep(1:n(), each = 4)) %>%
select(fips)
ny_county_df_2015 =
bind_cols(ny_county_df, nyc_fip) %>%
filter(year == 2015)
highlight_county = function(county_fips)
{
data(county.map, package="choroplethrMaps", envir=environment())
df = county.map[county.map$region %in% county_fips, ]
geom_polygon(data=df, aes(long, lat, group = group), color = "yellow", fill = NA, size = 1)
}
add_text_county = function(county_fips){
data(county.map, package="choroplethrMaps", envir=environment())
df = county.map[county.map$region %in% county_fips, ]
#geom_text(data=df, aes(mean(long), mean(lat), label = paste0(str_to_title(pull(county_fips, county)), " County\n", pull(county_fips, death_rate))), color = "white")
geom_label(data=df, aes(mean(long), mean(lat), label = paste0(str_to_title(pull(county_fips, county)), " County\nDR: ", round(pull(county_fips, death_rate),2))), fill = "white", size = 3)
}
ny_deathrate_2015 =
ny_county_df_2015 %>%
select(-population) %>%
rename(region = fips,
value = death_rate) %>%
select(value, region)
county_choropleth(ny_deathrate_2015, title = "Death rates of Counties in New York City in 2015",
legend = "Death rates",
county_zoom = ny_deathrate_2015$region) +
highlight_county(ny_county_df_2015[which.max(pull(ny_county_df_2015, death_rate)),]) +
add_text_county(ny_county_df_2015[which.max(pull(ny_county_df_2015, death_rate)),])
ny_county_pop_2015 =
ny_county_df_2015 %>%
select(population, fips) %>%
rename(value = population,
region = fips)
data(county.regions)
nyc_county_names = c("kings", "bronx", "new york", "queens", "richmond")
nyc_county_fips = county.regions %>%
filter(state.name == "new york" & county.name %in% nyc_county_names) %>%
select(region)
county_choropleth(ny_county_pop_2015,
title = "Population of Counties in New York City in 2015",
legend = "Population",
county_zoom = nyc_county_fips$region)
Percent with drugs specified for each month from 2015-2021 are high enough, all above 98%
drug_percent_specified =
nyc_df %>%
ungroup() %>%
select(year, month, indicator, data_value) %>%
filter(indicator == "Percent with drugs specified")